Time Until Large Events#
import pandas as pd
import numpy as np
import datetime as dt
csv_file = "../datasets/Formatted_ETAS_Output.csv"
etas = pd.read_csv(csv_file, sep = ',', lineterminator='\n')
csv_file = "../datasets/All (1960-2023).csv"
usgs = pd.read_csv(csv_file, sep = ',', lineterminator='\n', dtype={'time':str})
C:\Users\Vishal\AppData\Local\Temp\ipykernel_20100\2239509985.py:4: DtypeWarning: Columns (1,2,3,4,6,7,8,9,15,16,17,18) have mixed types. Specify dtype option on import or set low_memory=False.
usgs = pd.read_csv(csv_file, sep = ',', lineterminator='\n', dtype={'time':str})
Data Filtering#
Converting the date columns to datetime
Date > 1960-01-01 and < 2023-01-01
Longitude > -123 and < -113
Latitude > 29 and < 39
etas["Date"] = pd.to_datetime(etas["Date"], errors="coerce", format="%m/%d/%y")
etas.loc[etas["Date"].dt.year > pd.Timestamp.now().year, "Date"] -= pd.DateOffset(years=100)
#converting the Date column into datetime format
usgs["Date"] = pd.to_datetime(usgs["time"], errors="coerce").dt.strftime("%Y-%m-%d")
usgs.drop(columns=["time"], inplace=True)
etas = etas[(etas['Date'] > pd.to_datetime('1960-01-01')) & (etas['Date'] < pd.to_datetime('2023-01-01'))]
#filter the dataset by X > -123 and X < -113 and Y > 29 and Y < 39
etas = etas[etas['X'] > -123]
etas = etas[etas['X'] < -113]
etas = etas[etas['Y'] < 39]
etas = etas[etas['Y'] > 29]
usgs = usgs[(pd.to_datetime(usgs['Date']) > pd.to_datetime('1960-01-01')) & (pd.to_datetime(usgs['Date']) < pd.to_datetime('2023-01-01'))]
usgs['longitude'] = pd.to_numeric(usgs['longitude'], errors='coerce')
usgs['latitude'] = pd.to_numeric(usgs['latitude'], errors='coerce')
usgs['mag'] = pd.to_numeric(usgs['mag'], errors='coerce')
#filter the dataset by X > -123 and X < -113 and Y > 29 and Y < 39
usgs = usgs[usgs['longitude'] > -123]
usgs = usgs[usgs['longitude'] < -113]
usgs = usgs[usgs['latitude'] < 39]
usgs = usgs[usgs['latitude'] > 29]
max_mag_etas = pd.DataFrame(etas.groupby(etas['Date'].dt.to_period('D')).Magnitude.max())
max_mag_etas.reset_index(inplace=True)
Data Grouping And Merging#
Data is grouped into 1 day chunks based on the max magnitude
time = []
for i in usgs['Date']:
time.append(pd.to_datetime(i))
usgs['Date'] = time
max_mag_usgs = pd.DataFrame(usgs.groupby(usgs['Date'].dt.to_period('D')).mag.max())
max_mag_usgs.reset_index(inplace=True)
large_earthquake = 6
Large Events#
A label is added to Large Event data
large_mag_etas = max_mag_etas.copy()
large_mag_etas["Large Event"] = (large_mag_etas["Magnitude"] > large_earthquake).astype(int)
large_mag_etas["Date"] = large_mag_etas["Date"].dt.to_timestamp()
large_mag_etas['time_diff'] = large_mag_etas.loc[large_mag_etas['Large Event'] == 1, 'Date'].diff().dt.days
large_mag_etas['time_diff'].iloc[0] = pd.NA
large_mag_etas.head()
# large_mag_etas.to_csv("large_mag_etas.csv")
C:\Users\Vishal\AppData\Local\Temp\ipykernel_20100\1056532160.py:5: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
large_mag_etas['time_diff'].iloc[0] = pd.NA
| Date | Magnitude | Large Event | time_diff | |
|---|---|---|---|---|
| 0 | 1960-01-02 | 4.25 | 0 | NaN |
| 1 | 1960-01-03 | 3.90 | 0 | NaN |
| 2 | 1960-01-04 | 4.24 | 0 | NaN |
| 3 | 1960-01-05 | 3.40 | 0 | NaN |
| 4 | 1960-01-06 | 3.47 | 0 | NaN |
import plotly.express as px
import plotly.graph_objects as go
fig = go.Figure(data=[go.Bar(
x=large_mag_etas['Date'],
y=large_mag_etas['time_diff'],
)])
# Customize the bar appearance
fig.update_traces(marker_line_color='black', marker_line_width=1) # Set bar color to red and make the bar border black and thicker
# Customize the plot layout
fig.update_layout(
title='Time Difference Bar Chart (ETAS)',
xaxis_title='Date',
yaxis_title='Time Difference (Days)',
)
# Show the plot
fig.show()
large_mag_usgs = max_mag_usgs.copy()
large_mag_usgs["Large Event"] = (large_mag_usgs["mag"] > large_earthquake).astype(int)
large_mag_usgs["Date"] = large_mag_usgs["Date"].dt.to_timestamp()
large_mag_usgs['time_diff'] = large_mag_usgs.loc[large_mag_usgs['Large Event'] == 1, 'Date'].diff().dt.days
large_mag_usgs['time_diff'].iloc[0] = pd.NA
large_mag_usgs.head()
C:\Users\Vishal\AppData\Local\Temp\ipykernel_20100\3186837181.py:5: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
| Date | mag | Large Event | time_diff | |
|---|---|---|---|---|
| 0 | 1960-01-02 | 4.04 | 0 | NaN |
| 1 | 1960-01-05 | 3.03 | 0 | NaN |
| 2 | 1960-01-07 | 3.64 | 0 | NaN |
| 3 | 1960-01-08 | 3.10 | 0 | NaN |
| 4 | 1960-01-11 | 3.79 | 0 | NaN |
fig = go.Figure(data=[go.Bar(
x=large_mag_usgs['Date'],
y=large_mag_usgs['time_diff'],
)])
# Customize the bar appearance
fig.update_traces(marker_color='red', marker_line_color='black', marker_line_width=1) # Set bar color to red and make the bar border black and thicker
# Customize the plot layout
fig.update_layout(
title='Time Difference Bar Chart (USGS)',
xaxis_title='Date',
yaxis_title='Time Difference (Days)',
)
# Show the plot
fig.show()
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Cell In[15], line 1
----> 1 import tensorflow as tf
2 from sklearn.model_selection import train_test_split
3 from sklearn.preprocessing import StandardScaler
ModuleNotFoundError: No module named 'tensorflow'
sequence_length = 10 # You can adjust this based on your data and requirements
# Create sequences of data for training
def create_sequences(data, sequence_length):
sequences = []
for i in range(len(data) - sequence_length):
sequence = data[i:i+sequence_length]
sequences.append(sequence)
return np.array(sequences)
# Extract the relevant columns and scale the data
large_events_etas = large_mag_etas[(large_mag_etas['Large Event'] == 1) & ~large_mag_etas['time_diff'].isna()]
data = large_events_etas[['Large Event', 'time_diff']]
scaler = StandardScaler()
data = scaler.fit_transform(data)
print(data)
[[ 0. -0.93676124]
[ 0. 0.54943116]
[ 0. -0.53469953]
[ 0. 0.36515287]
[ 0. -1.01813088]
[ 0. -0.6687201 ]
[ 0. 2.42811237]
[ 0. 0.78636038]
[ 0. 0.11625753]
[ 0. 0.78636038]
[ 0. 1.43492421]
[ 0. -0.95112059]
[ 0. -1.09232083]
[ 0. -0.83145937]
[ 0. -1.03727667]
[ 0. -0.30255676]
[ 0. 2.34434952]
[ 0. -0.34324157]
[ 0. 0.60686854]
[ 0. 1.57373123]
[ 0. -0.57777757]
[ 0. 0.53267859]
[ 0. -0.42939765]
[ 0. -0.43179088]
[ 0. -0.65675398]
[ 0. -0.55623855]
[ 0. -1.04206312]
[ 0. 0.35079353]
[ 0. -0.91043577]
[ 0. 0.79114683]
[ 0. -0.40307218]
[ 0. -0.19007521]
[ 0. -0.43657733]
[ 0. 0.76242814]
[ 0. 3.01923882]
[ 0. -0.20443455]
[ 0. -0.65675398]
[ 0. -0.52751986]
[ 0. -0.84342549]
[ 0. -0.80034745]
[ 0. 1.53543964]
[ 0. 0.04924724]
[ 0. -0.1015259 ]
[ 0. -1.04445634]
[ 0. -0.50358761]]
# Create sequences
sequences = create_sequences(data, sequence_length)
# Split the data into training and testing sets
X = sequences[:, :, :-1] # Input features (all columns except the last one)
y = sequences[:, -1, -1] # Target variable (time_diff for the last time step)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Build the LSTM model
model = tf.keras.Sequential([
tf.keras.layers.LSTM(64, input_shape=(X.shape[1], X.shape[2])),
tf.keras.layers.Dense(1)
])
# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')
# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))
# Make predictions
predicted_time_until_next_large_event = model.predict(X_test)
# Inverse transform the predictions to get them in the original scale
predicted_time_until_next_large_event = scaler.inverse_transform(np.column_stack((X_test[:, -1, :], predicted_time_until_next_large_event)))
Epoch 1/10
1/1 [==============================] - 2s 2s/step - loss: 1.1498 - val_loss: 0.4267
Epoch 2/10
1/1 [==============================] - 0s 28ms/step - loss: 1.1486 - val_loss: 0.4338
Epoch 3/10
1/1 [==============================] - 0s 27ms/step - loss: 1.1475 - val_loss: 0.4417
Epoch 4/10
1/1 [==============================] - 0s 28ms/step - loss: 1.1465 - val_loss: 0.4504
Epoch 5/10
1/1 [==============================] - 0s 28ms/step - loss: 1.1456 - val_loss: 0.4601
Epoch 6/10
1/1 [==============================] - 0s 29ms/step - loss: 1.1449 - val_loss: 0.4709
Epoch 7/10
1/1 [==============================] - 0s 29ms/step - loss: 1.1444 - val_loss: 0.4825
Epoch 8/10
1/1 [==============================] - 0s 33ms/step - loss: 1.1442 - val_loss: 0.4943
Epoch 9/10
1/1 [==============================] - 0s 25ms/step - loss: 1.1443 - val_loss: 0.5038
Epoch 10/10
1/1 [==============================] - 0s 29ms/step - loss: 1.1447 - val_loss: 0.5089
1/1 [==============================] - 0s 319ms/step
print(predicted_time_until_next_large_event)
[[ 1. 530.0596171]
[ 1. 530.0596171]
[ 1. 530.0596171]
[ 1. 530.0596171]
[ 1. 530.0596171]
[ 1. 530.0596171]
[ 1. 530.0596171]]
# Extract the relevant columns and scale the data
large_events_usgs = large_mag_usgs[(large_mag_usgs['Large Event'] == 1) & ~large_mag_usgs['time_diff'].isna()]
data = large_events_usgs[['Large Event', 'time_diff']]
scaler = StandardScaler()
data = scaler.fit_transform(data)
print(data)
[[ 0. 0.65769802]
[ 0. 0.36480705]
[ 0. 0.20122433]
[ 0. -0.94541263]
[ 0. -0.76313474]
[ 0. -0.85193679]
[ 0. -0.89400092]
[ 0. -0.9080223 ]
[ 0. -0.95164435]
[ 0. 1.07989723]
[ 0. -0.60889961]
[ 0. -0.95320228]
[ 0. -0.93606505]
[ 0. 0.69041456]
[ 0. -0.39857897]
[ 0. -0.62447892]
[ 0. -0.0137701 ]
[ 0. -0.19137419]
[ 0. 0.12488573]
[ 0. 0.4738622 ]
[ 0. -0.85349472]
[ 0. -0.45310654]
[ 0. -0.57462513]
[ 0. -0.58553065]
[ 0. 1.94143288]
[ 0. 1.4241999 ]
[ 0. 2.61913272]
[ 0. 1.5410447 ]
[ 0. 1.80900878]
[ 0. -0.95320228]
[ 0. -0.46712792]]
# Create sequences
sequences = create_sequences(data, sequence_length)
# Split the data into training and testing sets
X = sequences[:, :, :-1] # Input features (all columns except the last one)
y = sequences[:, -1, -1] # Target variable (time_diff for the last time step)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Build the LSTM model
model = tf.keras.Sequential([
tf.keras.layers.LSTM(64, input_shape=(X.shape[1], X.shape[2])),
tf.keras.layers.Dense(1)
])
# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')
# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))
# Make predictions
predicted_time_until_next_large_event = model.predict(X_test)
# Inverse transform the predictions to get them in the original scale
predicted_time_until_next_large_event = scaler.inverse_transform(np.column_stack((X_test[:, -1, :], predicted_time_until_next_large_event)))
Epoch 1/10
1/1 [==============================] - 2s 2s/step - loss: 0.8276 - val_loss: 2.4585
Epoch 2/10
1/1 [==============================] - 0s 29ms/step - loss: 0.8274 - val_loss: 2.4760
Epoch 3/10
1/1 [==============================] - 0s 32ms/step - loss: 0.8273 - val_loss: 2.4897
Epoch 4/10
1/1 [==============================] - 0s 46ms/step - loss: 0.8274 - val_loss: 2.4950
Epoch 5/10
1/1 [==============================] - 0s 40ms/step - loss: 0.8274 - val_loss: 2.4937
Epoch 6/10
1/1 [==============================] - 0s 41ms/step - loss: 0.8274 - val_loss: 2.4886
Epoch 7/10
1/1 [==============================] - 0s 28ms/step - loss: 0.8273 - val_loss: 2.4821
Epoch 8/10
1/1 [==============================] - 0s 30ms/step - loss: 0.8273 - val_loss: 2.4756
Epoch 9/10
1/1 [==============================] - 0s 33ms/step - loss: 0.8273 - val_loss: 2.4700
Epoch 10/10
1/1 [==============================] - 0s 28ms/step - loss: 0.8273 - val_loss: 2.4660
1/1 [==============================] - 0s 284ms/step
print(predicted_time_until_next_large_event)
[[ 1. 605.43713783]
[ 1. 605.43713783]
[ 1. 605.43713783]
[ 1. 605.43713783]
[ 1. 605.43713783]]